##------------------------------------
##
##         DATA PREAMBLE
##
##    Presidential Directives
##   in a Resistant Bureaucracy
##
##    Journal Of Public Policy
##
##------------------------------------



setwd('~/Dropbox/research/book/implementation/jpp/dataverse')

## source code to create 'replication_data.csv'
##  (further source code is available upon request)


#### preamble

x <- read.csv(file = 'ua_1983_2016.csv')

## subsets and recodes
x <- subset(x, x$nprm.planned == 1)
x <- subset(x, x$merged == 0)
x <- subset(x, !is.na(x$priority))
x <- subset(x, x$priority == 'Economically Significant' |
            x$priority == 'Other Significant' |
            x$priority == 'Substantive, Nonsignificant')
x$direct.rules <- ifelse(x$direct.final == 1 |
                         x$interim.final == 1, 1, 0)
x <- subset(x, x$direct.rules == 0)
x <- subset(x, x$anprm == 0)
x <- subset(x, x$year.init > 1994)

x$rfa <- ifelse(x$rfa == 'yes', 1, 0)
x$unfun.mandate <- ifelse(x$unfun.mandate == 'yes', 1, 0)
x$final.rule <- ifelse(x$stage == 'Final Rule', 1, 0)

# drop infrequent regulators (less than one reg per year)
n <- length(table(x$year.init)) # 21
m <- names(table(x$agency.id))[(table(x$agency.id) >= n) == TRUE]
m <- m[m != 'NA-NA']
x <- subset(x, x$agency.id %in% m)

## create party indicators
x$party <- ifelse(x$admin.init == 'bush43', 'rep', 'dem')
x$agency.party <- paste(x$agency.abb, x$party, sep = '-')

## regulation stakes index
x$sig <- ifelse(x$priority == 'Substantive, Nonsignificant', 0, 1)
x <- subset(x, !is.na(sig) & !is.na(rfa) & !is.na(gov.levels) &
               !is.na(legal.dline) & !is.na(unfun.mandate))
v <- subset(x, select = c(sig, rfa, gov.levels, legal.dline, unfun.mandate))
f <- factanal(v, 1, rotation ="varimax", scores = 'regression')
x$reg.stakes <- f$scores[,1]

## removal-right indicator
x$agency.id[x$agency.id == 'DOT-STB'] <- 'STB-STB'
x$agency.id[x$agency.id == 'DOE-FERC'] <- 'FERC-FERC'
x$agency.id[x$agency.id == 'HUD-FHFA'] <- 'FHFA-FHFA'
x$agency.abb <- gsub('^(.*)-.*$', '\\1', x$agency.id)
x$commission <- ifelse(x$agency.abb == 'CFTC' | x$agency.abb ==  'SEC' |
                       x$agency.abb ==  'FERC' |
                       x$agency.abb ==  'FDIC' |
                       x$agency.abb ==  'FCA' |
                       x$agency.abb ==  'RRB' |
                       x$agency.abb ==  'FHFB' |
                       x$agency.abb ==  'STB' |
                       x$agency.abb == 'FCC' | x$agency.abb == 'CPSC' | x$agency.abb ==  'FTC' |
                       x$agency.abb == 'FMC' | x$agency.abb == 'NRC', 1, 0)
x$removal.right <- 1 - x$commission

## compliance indicator
x$post.before.nprm <- ifelse(as.Date(x$ua.date.first) < as.Date(x$date.nprm), 1, 0)


## -- data to add --:
##   1. OIRA data
##   2. agency independence data
##   3. presidential approval
##   4. president's seat share in senate
##   5. agency ideology


## 1. OIRA data
##    source: reginfo.gov

load('oira_data.RData')
y <- oira.data
colnames(y) <- c('agency.code', 'rin', 'rule', 'stage', 'econ.sig', 'date.received',
                 'legal.dline', 'date.completed', 'decision', 'date.published')
y <- subset(y, select = -c(rule))
# fix dates
y$date.received <- as.Date(y$date.received, "%Y-%m-%d")
y$date.completed <- as.Date(y$date.completed, "%Y-%m-%d")

## prepare merge
y$review <- rep(1, nrow(y) )
# get first review for each admin
y$date.received <- as.Date(y$date.received)
nrow(y) # 42,014
# earliest date is first
y <- y[order(y$rin, y$date.received),]
y <- subset(y, !duplicated(y$rin) )
# subset
colnames(y)[colnames(y) == 'stage'] <- 'oira.stage'
y <- subset(y, select = c(rin, review, decision, oira.stage, date.received) )
## merge by admin and rin
x <- merge(x, y, by = c('rin'), all.x = T )
x$review[is.na(x$review)] <- 0
nrow(x) # 19,562

sum(x$review) # 4,375
## make sure review happened prior to NPRM
x$review[as.Date(x$date.nprm) < as.Date(x$date.received)] <- 0
sum(x$review) # 3,780

## ----- OIRA review indicators
x$change <- rep(0, nrow(x))
x$change[x$decision == 'Consistent with Change'] <- 1
x$change[x$decision == 'Consistent with change'] <- 1
# accept
x$accept <- rep(0, nrow(x))
x$accept[x$decision == 'Consistent without Change'] <- 1
x$accept[x$decision == 'Consistent w/no change'] <- 1
x$accept[x$decision == 'Emergency case'] <- 1
x$accept[x$decision == 'Deadline case'] <- 1
x$accept[x$decision == 'Statutory or Judicial Deadline'] <- 1
# returned
x$returned <- rep(0, nrow(x))
x$returned[x$decision == 'Withdrawn by agency'] <- 1
x$returned[x$decision == 'Withdrawn'] <- 1
x$returned[x$decision == 'Returned for Reconsideration'] <- 1
x$returned[x$decision == 'Returned (reconsider)'] <- 1
x$returned[x$decision == 'Returned (improper)'] <- 1
x$returned[x$decision == 'Improperly Submitted'] <- 1
# recode variables
x$not.returned <- 1 - x$returned
# focus on 'proposed rule' submissions to OIRA
x$oira.stage.proposed <- ifelse(x$oira.stage == 'Proposed Rule', 1, 0)
## random effect should really be at the agency-party/admin level
##   (this reflects the ideological gap variable)
x$year.received <- substr(x$date.received, 1, 4)




##  2. agency independence
##     source: Selin (2015) What Makes an Agency Independent?

##  ----- import selin data ----
## e <- read.csv(file = 'selin_estimates.csv')
## r <- read.csv(file = 'agencycodes.csv')
## r$agency.id <- paste(r$agency.abb, r$bureau.abb, sep = '-')
## x <- read.csv(file = '~/Dropbox/research/book/implementation/ua_data3.csv')
## r <- subset(r, r$agency.id %in% unique(x$agency.id))
## r <- subset(r, select = c(agency.id, bureau))
## colnames(r)[2] <- 'agency'
## e <- subset(e, select = -c(agency.id))
## v <- merge(e, r, by = 'agency', all.x = T)
## v <- subset(v, select = c(agency, agency.id, d1.estimate, d2.estimate))
## save and hand code those that didn't merge
## import after hand edited merge
y <- read.csv('selin_merge_hand_edits.csv')
colnames(y)[colnames(y) == 'd1.estimate'] <- 'selin.d1'
x <- merge(x, y, by = 'agency.id', all.x = T)


## 3. presidential approval
##    source: gallup

p <- read.csv(file = 'presidential_approval.csv')
p$date <- as.Date(p$start.date, '%m/%d/%y')
p$year <- substr(p$date, 1, 4)
p$net.approval <- p$Approving - p$Disapproving
a <- aggregate(net.approval ~ year, mean, data = p)
## recode
x$year <- x$year.init
## merge
x <- merge(x, a, by = 'year', all.x = T)

## 4. president's seat share in senate
##    source: https://www.senate.gov/history/partydiv.htm

r <- matrix(c(1993, 57 ,
            1994, 57,
            1995, 48,
            1996, 48,
            1997, 45,
            1998, 45,
            1999, 45,
            2000, 45,
            2001, 50,
            2002, 50,
            2003, 51,
            2004, 51,
            2005, 55,
            2006, 55,
            2007, 49,
            2008, 49,
            2009, 57,
            2010, 51,
            2011, 51,
            2012, 51,
            2013, 53,
            2014, 53,
            2015, 44), ncol = 2, byrow = T)
colnames(r) <- c('year', 'pres.senate.seats')
x <- merge(x, r, by = 'year', all.x = T)


## 5. agency loyalty
##    source: Clinton and Lewis (2008)

a <- read.csv(file='agencycodes.csv', stringsAsFactors=F)
a <- a[,c('agency.code', 'ideology1')]
x <- merge(x, a, by='agency.code', all.x=T)

x$ideology1 <- as.numeric(x$ideology1)
x$ideology.gap <- rep(NA, nrow(x))
x$ideology.gap[x$admin.init == 'bush43'] <- abs(max(x$ideology1, na.rm = T) -
                                                x$ideology1[x$admin.init == 'bush43'])
x$ideology.gap[x$admin.init == 'clinton' | x$admin.init == 'obama'] <-
    abs(min(x$ideology1, na.rm = T) - x$ideology1[x$admin.init == 'clinton' |
                                                  x$admin.init == 'obama'])

x$agency.loyalty <- rep(NA, nrow(x))
x$agency.loyalty[x$admin.init == 'bush43'] <- abs(max(x$ideology.gap[x$admin.init == 'bush43'],
                                                      na.rm = T) -
                                                x$ideology.gap[x$admin.init == 'bush43'])
x$agency.loyalty[x$admin.init == 'clinton' | x$admin.init == 'obama'] <-
    abs(max(x$ideology.gap[x$admin.init == 'clinton' | x$admin.init == 'obama'], na.rm = T) -
        x$ideology.gap[x$admin.init == 'clinton' | x$admin.init == 'obama'])


x <- subset(x, !is.na(x$ideology.gap))
dim(x)


## select variables for analysis and save

m <- subset(x, select = c(agency.abb,
                          agency.id, party, agency.party,
                          year.init,
                          post.before.nprm,
                          reg.stakes,
                          removal.right,
                          selin.d1, net.approval, pres.senate.seats,
                          agency.loyalty, ideology1,
                          rfa, gov.levels, legal.dline, unfun.mandate, sig,
                          review, not.returned, oira.stage.proposed,
                          stage))


write.csv(m, file = 'replication_data.csv',
          row.names = F)
